log using DataPreparationlog , replace


**************************************************************************************
*Stata do-file for:
*How Education Policies Shape Political Inequality: Analyzing Policy Feedback Effects in Germany 
***
*Data Preparation Do File
**************************************************************************************


*Stata version 18
*Used Packages
ssc install fre, replace
ssc install panelview, replace
search gr0075 // install manually!
ssc install labutil, replace
ssc install sencode, replace
search grc1leg // install manually!
ssc install iscogen, replace
ssc install blindschemes, replace



*** TYPOLOGY ***
****************

clear all
cls


global erg "..\Results\Final"

************************************************
*** ADD ADDITIONAL MACRO DATA ******************
************************************************

*****
***Federal State Election Results
*****

*data obtained from christian endt.
*cross-checked and with information of federal returning officer (Bundeswahlleiterin). Few entries corrected.

import excel using MacroData\LTWs.xlsx, firstrow clear 
gen year = year(Wahltag)

sort Land year
egen BuLa = group(Land)

bysort BuLa year: gen obs = _n
bysort BuLa year: gen Obs = _N

tab obs
br if obs == 2 //
*2 elections in hamburg 1982. new election after 6 months due to failed government formation -> use 2nd election, drop first.

drop if Obs == 2 & obs == 1
drop obs Obs
xtset BuLa year

tsfill 
br
foreach var of varlist Wahltag Wahlbeteiligung CDUCSU SPD FDP Gruene Linke AfD {
	bysort BuLa: replace `var' = `var'[_n-1] if `var' == . & `var'[_n-1] != .
}
rename CDUCSU CDUCSCUvote
rename SPD SPDvote
rename FDP FDPvote
rename Gruene Gruenevote
rename Linke Linkevote
rename AfD AfDvote


replace Land = Land[_n-1] if Land == "" & Land[_n-1] != ""

rename Land bula
br
fre year
drop if year > 2009 | year < 1949

replace bula = "Thueringen" if bula == "Thüringen"
replace bula = "Baden-Wuerttemberg" if bula == "Baden-Württemberg"
replace bula= ustrupper(bula)
save ltw.dta, replace

*****
***Unemployment Rate and Absolute Numbers
*****

import excel using MacroData\Alo5090abs.xlsx, firstrow clear

xpose, clear varname
rename _varname bula

forvalues i = 1/41 {
	local numb = `i' + 1949
	rename v`i'  alo`numb'
}

drop if _n == 1

reshape long alo, i(bula) j(year)

save alo5090abs.dta, replace

import excel using MacroData\Alo9122abs.xlsx, firstrow clear

xpose, clear varname
rename _varname bula

forvalues i = 1/32 {
	local numb = `i' + 1990
	rename v`i'  alo`numb'
}

drop if _n == 1

reshape long alo, i(bula) j(year)

save alo9122abs.dta, replace

import excel using MacroData\Alo5090rate.xlsx, firstrow clear

xpose, clear varname
rename _varname bula

forvalues i = 1/41 {
	local numb = `i' + 1949
	rename v`i'  alorate`numb'
}

drop if _n == 1

reshape long alorate, i(bula) j(year)
save alo5090rate.dta, replace


import excel using MacroData\Alo9122rate.xlsx, firstrow clear

xpose, clear varname
rename _varname bula

forvalues i = 1/32 {
	local numb = `i' + 1990
	rename v`i'  alorate`numb'
}

drop if _n == 1

reshape long alorate, i(bula) j(year)
save alo9122rate.dta, replace

*alo merge:
use alo5090abs.dta, clear
merge 1:1 bula year using alo5090rate.dta 
drop _merge
save alo5090.dta, replace

use alo9122abs.dta, clear
merge 1:1 bula year using alo9122rate.dta 
drop _merge
save alo9122.dta, replace

use alo5090.dta, clear 
append using alo9122.dta
sort bula year

replace bula = "Berlin" if bula == "BerlinWest"
replace bula = "Baden-Wuerttemberg" if bula == "BadenWürttemberg"
replace bula = "Nordrhein-Westfalen" if bula == "NordrheinWestfalen"
replace bula = "Rheinland-Pfalz" if bula == "RheinlandPfalz"
replace bula = "Schleswig-Holstein" if bula == "SchleswigHolstein"
replace bula = "Sachsen-Anhalt" if bula == "SachsenAnhalt"
replace bula = "Mecklenburg-Vorpommern" if bula == "MecklenburgVorpommern"
replace bula = "Thueringen" if bula == "Thüringen"

replace bula= ustrupper(bula)

fre year

drop if year > 2009 | year < 1949

save alo.dta, replace

erase alo9122.dta 
erase alo5090.dta 
erase alo5090abs.dta 
erase alo9122abs.dta 
erase alo9122rate.dta 
erase alo5090rate.dta

*****
***Helbig/Nikolai Data on Cabinet Composition
*****

import excel using MacroData\cabinets.xlsx, firstrow clear
rename Bundesland bula
rename Jahr year

replace bula = "Thueringen" if bula == "Thüringen"
replace bula = "Baden-Wuerttemberg" if bula == "Baden-Württemberg"
replace bula= ustrupper(bula)

fre year

drop if year > 2009 | year < 1949

rename CDU CDUcabinet
rename SPD SPDcabinet
rename FDP FDPcabinet
rename GRÜN GRÜNcabinet
rename LINKE LINKEcabinet



save cabinets.dta, replace



************************************************
*** Educational Policies ******************
************************************************
*Data coded based on Helbig & Nikolai: "Die Unvergleichbaren"
use EduTypology.dta, clear

save Typology_Mar23.dta, replace

*****
***combine with other macro data
*****

fre bula

replace bula = "BERLIN" if bula == "EHEM. BERLIN-WEST"
replace bula = "MECKLENBURG-VORPOMMERN" if bula == "MECKL.BG.-VORPOMMERN"

rename year_school year

merge 1:1 bula year using alo.dta
fre bula if _merge == 1
drop _merge
merge 1:1 bula year using ltw.dta
br
fre bula if _merge == 1
drop _merge
merge 1:1 bula year using cabinets.dta

drop _merge

*variables for analysis:
fre alo // absolute number of unemployed
fre alorate // unemployment rate

fre SPDvote // spd election result
egen leftvote = rowtotal(SPDvote Gruenevote Linkevote), missing // left parties election result

fre SPDcabinet // spd cabinet strength
egen leftcabinet = rowtotal(SPDcabinet GRÜNcabinet LINKEcabinet), missing // left parties cabinet strength

rename year year_school
fre bula
*****
***Recodings
*****

fre year_school
gen TimePeriod = autocode(year_school,5,1949,2009)
tab year_school TimePeriod 
egen TimePeriod2 = group(TimePeriod)
fre TimePeriod2
drop TimePeriod
rename TimePeriod2 TimePeriod
fre TimePeriod

*1. Changing scales of existing variables to 0-1
*1.1 Share of students getting Abi in Gymnasium 
label variable anteil_gym "Gymnasium as share of all schools leading to Abitur"
fre anteil_gym
recode anteil_gym (0=0) (1=0.33) (2=0.66) (3=1) // 0: >=95%; 0.33: 80-95%; ///
*0.66: 60-80%; 1: <60%
fre anteil_gym
label define manteil_gym 0 ">=95%" 1 "<60%" // problem labeling decimal values
label value anteil_gym manteil_gym
fre anteil_gym
rename anteil_gym share_gym
tab bula share_gym ,m
tab year share_gym if bula == "SAARLAND",m // 1949-1956 missing
tab year share_gym if bula == "BADEN-WUERTTEMBERG",m // 1949-1951 missing
tab year share_gym if bula == "BERLIN",m // 1949-1951 missing


*1.2 Duration of primary education
fre duration_grund
gen duration_prim = duration_grund
recode duration_prim  (0=0) (1=0.5) (2=1) (3=1) // 0=4 Jahre; 0.5=Förder; ///
*1=6 Jahre
fre duration_prim 
label define mduration_grund 0 "4 years" 1 "6 years"
label value duration_prim mduration_grund
label var duration_prim "Duration Primary Schooling"
fre duration_prim 
tab bula duration_prim, missing
tab year duration_prim if bula == "SAARLAND",m // 1949-1955 (!) missing
tab year duration_prim if bula == "BADEN-WUERTTEMBERG",m // 1949-1951 missing
tab year duration_prim if bula == "BERLIN",m // 1949-1951 missing

*1.3 Comprehensive school form
fre gesamtsch // already in required scale
rename gesamtsch compr_school
tab bula compr_school, missing
replace compr_school=0 if bula=="BADEN-WUERTTEMBERG" & inrange(year_school,1949,1969) | ///
bula=="BAYERN" & inrange(year_school,1949,1969) | /// 
bula=="BERLIN" & inrange(year_school,1949,1969) | /// 
bula=="BREMEN" & inrange(year_school,1949,1969) | /// 
bula=="HAMBURG" & inrange(year_school,1949,1969) | /// 
bula=="HESSEN" & inrange(year_school,1949,1969) | /// 
bula=="NIEDERSACHSEN" & inrange(year_school,1949,1969) | /// 
bula=="NORDRHEIN-WESTFALEN" & inrange(year_school,1949,1969) | ///  
bula=="RHEINLAND-PFALZ" & inrange(year_school,1949,1969) | /// 
bula=="SCHLESWIG-HOLSTEIN" & inrange(year_school,1949,1969) | ///  
bula=="SAARLAND" & inrange(year_school,1949,1969) 
tab bula compr_school, missing
*NO Missings except for former GDR because of manual coding range 1949-1969 as zero

* dummy for former GDR
gen GDR=1 if bula=="BRANDENBURG" & year_school<1991 | bula=="MECKLENBURG-VORPOMMERN" & year_school<1991 | bula=="SACHSEN" & year_school<1991 | bula=="SACHSEN-ANHALT" & year_school<1991 | bula=="THUERINGEN" & year_school<1991 
replace GDR=0 if bula=="BADEN-WUERTTEMBERG" | bula=="BAYERN" | bula=="BREMEN" | bula=="HAMBURG" | bula=="HESSEN" | bula=="NIEDERSACHSEN" | bula=="NORDRHEIN-WESTFALEN" | bula=="RHEINLAND-PFALZ" | bula=="SAARLAND" | bula=="SCHLESWIG-HOLSTEIN" | bula=="EHEM. BERLIN-WEST"
replace GDR=0 if bula=="BRANDENBURG" & year_school>1990 | bula=="MECKLENBURG-VORPOMMERN" & year_school>1990 | bula=="SACHSEN" & year_school>1990 | bula=="SACHSEN-ANHALT" & year_school>1990 | bula=="THUERINGEN" & year_school>1990 
tab bula GDR, missing

***
* INDEX OF STRATIFICATION (main Independent Variable federal states - level)
***
egen destrat_index = rowmean(duration_prim compr_school share_gym)
replace destrat_index = . if (duration_prim == . & compr_school == .)| (duration_prim == . & share_gym == .) | (share_gym == . & compr_school == .)
label variable destrat_index "De-Stratification Index"
sum destrat_index , detail
fre destrat_index
*range 0-1; mean .35; median .33
tab bula destrat_index, missing

replace destrat_index=. if GDR==1 

fre bula
tab destrat_index  if bula == "SACHSEN" & year_school < 1989,m // check
tab destrat_index  if bula == "SACHSEN" & year_school > 1989,m // check
*continuous measure of stratification without GDR

* 2 groups of Länder
*gen destrat_2gr=0 if destrat_index<=.239
*replace destrat_2gr=1 if destrat_index>.239
xtile destrat_2gr = destrat_index, nq(2) // <= 0.33; > 0.33; 55% - 45%
replace destrat_2gr = destrat_2gr - 1
tab destrat_2gr
tab destrat_index destrat_2gr 

replace destrat_2gr=. if destrat_index==.
label variable destrat_2gr "de-stratification, 2 groups"
label define ldestrat 0 "(more) stratified" 1 "(more) de-stratified" 2 "former GDR"
label values destrat_2gr ldestrat
tab bula destrat_2gr, missing
tab destrat_2gr

*build 5 cat index:
xtile destrat_5gr = destrat_index, nq(5) 
fre destrat_5gr
tab destrat_index destrat_5gr 

* prepare variables for matching on year of entry into primary school, year of entry into secondary school, year of being age 14
gen year_primschool=year_school
gen year_secschool=year_school
gen year_age14=year_school

*****
***
*Descriptive Graphs (of Edu Sys Vars between states over time)
***
*****

***re-label federal states with english names
fre bula

replace bula = "BAVARIA" if bula == "BAYERN"
replace bula = "HESSE" if bula == "HESSEN"
replace bula = "LOWER SAXONY" if bula == "NIEDERSACHSEN"
replace bula = "NORTH RHINE-WESTPHALIA" if bula == "NORDRHEIN-WESTFALEN"
replace bula = "RHINELAND-PALATINATE" if bula == "RHEINLAND-PFALZ"
replace bula = "SAXONY" if bula == "SACHSEN"
replace bula = "SAXONY-ANHALT" if bula == "SACHSEN-ANHALT"
replace bula = "THURINGA" if bula == "THUERINGEN"

fre bula

**Descriptive Graph for Educational Systems
gen destrat_2gr_GDR = destrat_2gr
replace destrat_2gr_GDR = 89 if GDR == 1
fre destrat_2gr_GDR
*for plotting: GDR as lowest value -> lightest color; less stratified as highest value = darkest color
gen plot_destrat_GDR = destrat_2gr_GDR
recode plot_destrat_GDR (89=0) (0 = 1) (1 = 2)
fre plot_destrat_GDR
graph drop _all
panelview plot_destrat_GDR, i(bula) t(year_school) type(treat) xlabdist(5) theme(bw) graphregion(fcolor(white)) legend(label(1 "Former GDR") label(2 "Stratifying Policies") label(3 "Destratifiying  Policies") pos(6) row(1)) ytitle("German Länder (Federal States)", size(vsmall)) xtitle("Year", size(vsmall)) title("(De-)stratifying education policies in the German Länder," "school years 1949/50-2009/10" "Binary Measure", size(vsmall) color(black)) scheme(plotplain)  graphregion(margin(small))

graph export "$erg\Appendix\Descriptive_EduSys.pdf", replace 

*for better understanding: create same graph for each part of the index
fre duration_prim
gen duration_prim_GDR = duration_prim
replace duration_prim_GDR = 89 if GDR == 1
fre duration_prim_GDR
* for plotting: GDR lowest value; less stratified highest value
recode duration_prim_GDR (89 = 0) (0 = 1) (0.5 = 2) (1 = 3), gen(plot_duration_prim)
tab duration_prim_GDR plot_duration_prim,m
label var duration_prim_GDR "Duration Primary Schooling"
fre duration_prim_GDR
fre duration_prim
panelview plot_duration_prim, i(bula) t(year_school) type(treat) xlabdist(5)  graphregion(fcolor(white))  legend(label(1 "Former GDR") label(2 "4 years") label(3 "4 + 2 Years") label(4 "6 Years") pos(6) row(1)) ytitle("German Länder (Federal States)", size(vsmall)) xtitle("Year", size(vsmall)) title("(De-)stratifying education policies in the German Länder," "school years 1949/50-2009/10" "Duration Primary Schooling", size(vsmall)) scheme(plotplain) theme(bw) graphregion(margin(small))
tab bula duration_prim 

graph export "$erg\Appendix\Descriptive_EduSys_IndexPart1.pdf", replace 

gen compr_school_GDR = compr_school
replace compr_school_GDR = 89 if GDR == 1
fre compr_school_GDR
fre compr_school
recode compr_school_GDR (0= 1) (1=2) (89 = 0), gen(plot_compr_school)
panelview plot_compr_school, i(bula) t(year_school) type(treat) xlabdist(5) theme(bw) graphregion(fcolor(white)) legend(label(1 "Former GDR") label(2 "No Comprehensive School)") label(3 "Comprehensive School")  pos(6) row(1)) ytitle("German Länder (Federal States)", size(vsmall)) xtitle("Year", size(vsmall)) title("(De-)stratifying education policies in the German Länder," "school years 1949/50-2009/10" "Comprehensive School Form", size(vsmall) color(black)) scheme(plotplain) graphregion(margin(small)) 

graph export "$erg\Appendix\Descriptive_EduSys_IndexPart2.pdf", replace 

gen share_gym_GDR = share_gym
replace share_gym_GDR = 89 if GDR == 1
fre share_gym_GDR
fre share_gym

recode share_gym_GDR (89 = 0) (0=1) (0.33 = 2) (0.66 = 3) (1= 4), gen(plot_share_gym) 
fre plot_share_gym
tab plot_share_gym share_gym_GDR
panelview plot_share_gym, i(bula) t(year_school) type(treat) xlabdist(5) theme(bw) graphregion(fcolor(white)) legend(label(1 "Former GDR") label(2 ">= 95%") label(3 "80-95%") label(4 "60-80%") label(5 "< 60%") pos(6) row(1) title(" ")) ytitle("German Länder (Federal States)", size(vsmall)) xtitle("Year", size(vsmall)) title("(De-)stratifying education policies in the German Länder," "school years 1949/50-2009/10"  "Share of Gymnasium of all schools leading to Abitur", size(vsmall) color(black))scheme(plotplain) graphregion(margin(small))

graph export "$erg\Appendix\Descriptive_EduSys_IndexPart3.pdf", replace 

***Descriptive Graph for continuous outcome (continous stratification index)

panelview destrat_5gr, i(bula) t(year_school) type(treat) xlabdist(5) theme(bw) graphregion(fcolor(white)) legend(label(1 "Stratifying Policies") label(2 " ") label(3 " ") label(4 " ")  label(5 "De-Stratifying Policies") pos(6) row(1) title(" ")) ytitle("German Länder (Federal States)", size(vsmall)) xtitle("Year", size(vsmall)) title("(De-)stratifying education policies in the German Länder," "school years 1949/50-2009/10", size(vsmall) color(black)) scheme(plotplain)   note("Continuous measure of (de-)stratifying education policies aggregated into 5 categories for graphical display." "Original index more fine-grained." "Educational Policies in former GDR as missing (white)", size(tiny)) graphregion(margin(small))

graph export "$erg\Descriptive_EduSys_5grCont.pdf", replace 
graph export "$erg\Descriptive_EduSys_5grCont.svg", replace 

gen bulayouth=bula 
fre bulayouth

******
*Intermediate result
******

*basically no variation (especially over time, but also between federal states) regarding
*  - abitur_all (yes/no whether it's possible to get abitur at all schools)
*-> exclude from index!

*****
***
*decompose Edu System Variables into Between & Within for different observation periods
***
*****

egen bulay_nr = group(bula)
tab bula bulay_nr

**
fre destrat_2gr

rename destrat_2gr mod_edsys

fre duration_prim share_gym compr_school
drop duration_prim_GDR share_gym_GDR compr_school_GDR

*for single policy indicators duration_prim and need to be dichotomized
replace duration_prim = 1 if duration_prim == 0.5
fre share_gym
replace share_gym = 1 if share_gym > 0.32 & share_gym < 0.67 // recoding of middle-values 0.33 and 0.66
fre share_gym

*foreach of the educational system variables + pol/econ macro control vars
foreach sysvar in mod_edsys destrat_index destrat_5gr duration_prim compr_school share_gym alo alorate SPDvote leftvote SPDcabinet leftcabinet {
*** für alle Jahre:
bysort bulay_nr: egen B_`sysvar'=mean(`sysvar')
gen W_`sysvar'=`sysvar'-B_`sysvar'
}

label variable B_mod_edsys "Between: Binary De-stratification Index"
label variable W_mod_edsys "Within: Binary De-stratification Index"
label variable B_destrat_index "Between: De-Strat. Index"
label variable W_destrat_index "Within: De-Strat. Index"
label variable B_destrat_5gr "Between: De-Stratification Index"
label variable W_destrat_5gr "Within: De-Stratification Index"

label variable mod_edsys "Binary De-stratification Index"
label variable duration_prim "Duration Primary Schooling"
label variable compr_school "Comprehensive School Form"
label variable share_gym "Gymnasium as % of Schools w. Abitur"

label variable B_duration_prim "Between: Duration Primary Schooling"
label variable B_compr_school "Between: Comprehensive School Form"
label variable B_share_gym "Between: Gymnasium as % of Schools w. Abitur"

label variable W_duration_prim "Within: Duration Primary Schooling"
label variable W_compr_school "Within: Comprehensive School Form"
label variable W_share_gym "Within: Gymnasium as % of Schools w. Abitur"

label variable W_alo "Within: Absolute Unemployed Individuals "
label variable W_alorate "Within: Unemployment Rate "
label variable W_SPDvote "Within: SPD vote share "
label variable W_leftvote "Within: Left parties' vote share"
label variable W_SPDcabinet "Within: SPD cabinet strength"
label variable W_leftcabinet "Within: Left parties' cabinet strength"

label variable B_alo "Between: Absolute Unemployed Individuals "
label variable B_alorate "Between: Unemployment Rate "
label variable B_SPDvote "Between: SPD vote share "
label variable B_leftvote "Between: Left parties' vote share"
label variable B_SPDcabinet "Between: SPD cabinet strength"
label variable B_leftcabinet "Between: Left parties' cabinet strength"


drop bulay_nr

br bula year_school destrat_2gr B_mod_edsys W_mod_edsys

save, replace



*** ALLBUS *** 
**************

use "ZA5274_v1-1-0.dta", clear

*fre tps68 mps68 isei68 gld68 isco88 siops88 mps88 isei88 isco08 siops08 isei08 eseg
*use isei88, since it covers the longest time span (not observed 1980-1998, isei68 not observed 1980-1998 + 2012-2018; isei08 not observed 1980-2012 )

*create reduced dataset with variables we actually use:
keep year respid pa01 pa02a pa04 pv03 pp24-pp41 pe01-pe08 id01-id03 im01 de01 de05-de15 iscd975 iscd11 sex age work miscd975 fiscd975 misei88 fisei88 pv02 dg10 yborn land dg03 educ feduc meduc hhinc inc isei88 isco88 dw01 dw02 dw03 mde01 mde05-mde15 fde01 fde05-fde15 german

save Allbus_prep_Mar23.dta, replace


* the following code means: 
* for each variable in the Allbus, confirm that the variable is numeric
* if there is an error in the command, ignore it
* and quietly summarize the variable if it is smaller than 0
* then, if it is smaller than 0 and if the number of observations is unequal 0
* quietly insert the list of values in the variable in the local macro "mlist"
* and for each observation in the local "mlist",
* then follows the guidelines for each missing value: if the observation is -1, define the label of the observation as .a and label it "content"
* finally, mvdecode changes the numeric values to missing values (as defined before)

foreach var of varlist _all {
	capture confirm numeric var `var'
	  if !_rc {
	 qui: summ `var' if `var' < 0
	  if `r(N)' !=0{
		qui: levelsof `var' if `var' < 0, local(mlist)
			foreach i in `mlist' {
			local content: label `var' `i', strict
			if `i' == -1 { 
			label define `var' .a  "`content'" , modify
			}
			if `i' == -6 { 
			label define `var' .b  "`content'" , modify
			}
			if `i' == -7 { 
			label define `var' .c "`content'" , modify
			}
			if `i' == -8 { 
			label define `var' .d  "`content'" , modify
			}
			if `i' == -9 {
		 	label define `var' .e   "`content'" , modify
			}
			if `i' == -10 {
			label define `var' .f   "`content'" , modify
			}
			if `i' == -11 {
			label define `var' .g   "`content'" , modify
			}
			if `i' == -12 { 
			label define `var' .h "`content'" , modify
			}	
			if `i' == -13 { 
			label define `var'   .i   "`content'" , modify
			}
			if `i' == -14 { 
			label define `var' .j  "`content'" , modify
			}
			if `i' == -32 { 
			label define `var'  .k   "`content'" , modify
			}	
			if `i' == -33 { 
			label define `var' .l   "`content'" , modify
			}
			if `i' == -34 { 
			label define `var' .m  "`content'" , modify
			}		
			if `i' == -41 { 
			label define `var' .n    "`content'" , modify
			}
			if `i' == -88 {		 
			label define `var' .z  "`content'" , modify
			}
			if `i' == -50  {
			label define `var' .o "`content'" , modify
			}
			if `i' == -51  {
			label define `var' .p "`content'" , modify
			}
			if `i' == -52  {
			label define `var' .q "`content'" , modify
			}
			if `i' == -53  {
			label define `var' .r "`content'" , modify
			}
			if `i' == -54  {
			label define `var' .s "`content'" , modify
			}
			if `i' == -55  {
			label define `var' .t "`content'" , modify
			}
			if `i' == -56  {
			label define `var' .u "`content'" , modify
			}
			if `i' == -57  {
			label define `var' .v "`content'" , modify
			}
			if `i' == -58  {
			label define `var' .w "`content'" , modify
			}
			if `i' == -59  {
			label define `var' .x "`content'" , modify
			}
			}

			}	
qui:	mvdecode `var', mv(-1=.a \-6=.b	\-7=.c	\-8	=.d	\ -9=.e ///
	\ -10=.f \ -11=.g \-12 =.h\-13 =.i \-14 =.j	///
	\ -32=.k \ -33=.l \-34 =.m \ -41=.n ///
	\ -50=.o \ -51=.p \-52 =.q  \ -53=.r\ -54=.s\ -55=.t \ -56=.u \-57=.v \ -58=.w	\ -59=.x ///
	\ -88=.z)
}
}

*** CODING *** 
**************


* DEPENDENT VARIABLES
* political interest
gen orig_polint=pa02a
label define interest 1 "very interested" 2 "rather interested" 3 "interested" 4 "rather not interested" 5 "not at all interested"
label values orig_polint interest
label variable orig_polint "Political Interest"
tab year orig_polint, missing
gen polint = (orig_polint - 6) * (-1)
fre polint
replace polint = (polint-1) / 4
tab polint orig_polint
label define revinterest 1 "very interested"  0 "not at all interested"
label values polint revinterest
label variable polint "Political Interest"
fre polint 
*asked every year

** forms of participation ever used
* pp06-pp23: intention; pp24-41: self-reported participation in the past
* available 1988, 1998, 2008, 2018 
* in addition in 2002: participation in election, work in party, unlawful demonstration, demonstration, abstain, protest vote, petition
* for most forms of participation, they ask in addition whether one has uyed these forms in the past two years - however, this information is only available for waves 1998 & 2008 (and therefore not coded)

* telling friends one's opinion
gen opinion=pp24
label variable opinion "Told friends opinion"
label values opinion binary
tab year opinion , missing

* participation in elections
gen elect=pp25
label variable elect "Participated in election"
label values elect binary
tab year elect , missing

* public discussion
gen discuss=pp26
label variable discuss "Participated public discussion"
label values discuss binary
tab year discuss , missing

* citizen initative
gen initiv=pp27
label variable initiv "Participated citizen initiative"
label values initiv binary
tab year initiv , missing

* work in party
gen wrkprty=pp28
label variable wrkprty "Worked in party"
label values wrkprty binary
tab year wrkprty , missing

* support candidates
gen suppcand=pp29
label variable suppcand "Supported candidates"
label values suppcand binary
tab year suppcand , missing
* only 1988, 1998! 

* demonstration
gen unlawdem=pp30
label variable unlawdem "Participated unlawful demonstration"
label values unlawdem binary
tab year unlawdem , missing

* occupy building/street or the like
gen occupy=pp31
label variable occupy "Participated occupation"
label values occupy binary
tab year occupy , missing
*only 1988, 1998!

* making trouble in demonstration
gen trouble=pp32
label variable trouble "Made trouble demonstration"
label values trouble binary
tab year trouble , missing
*only 1988, 1998!

* use violence against person
gen violpers=pp33
label variable violpers "Used violence against person"
label values violpers binary
tab year violpers , missing
*only 1988, 1998!

* intimidate opponents
gen intimidate=pp34
label variable intimidate "Intimidated opponents"
label values intimidate binary
tab year intimidate , missing
*only 1988, 1998!

* participate lawful demonstration
gen demo=pp35
label variable demo "Participated lawful demonstration"
label values demo binary
tab year demo , missing
*not in 1988

* abstain from elections out of protest
gen abstain=pp36
fre pp36
label variable abstain "Abstained out of protest"
label values abstain binary
tab year abstain , missing
*not in 1988

* protest voting (vote for another party than the preferred one out of protest)
gen protvote=pp37
label variable protvote "Protest voted"
label values protvote binary
tab year protvote , missing
*not in 1988

* participate in petition (Unterschriftensammlung)
gen petition=pp38
label variable petition "Signed petition"
label values petition binary
tab year petition , missing
*not in 1988

* political consumerism
gen plconsm=pp40
label variable plconsm "Boy-/buycotted"
label values plconsm binary
tab year plconsm , missing
*only 2008, 2018

* online protest
gen onlinep=pp41
label variable onlinep "Protested online"
label values onlinep binary
tab year onlinep , missing
*only 2008, 2018

* indices of participation (total, conventional, unconventional)
* based on variables asked in 1998, 2008, 2018
* divided by number of items included, so all indices range from 0-1

gen part1=(opinion+elect+discuss+initiv+wrkprty+unlawdem+demo+abstain+protvote+petition)/10
label variable part1 "Participation Index"
tab year part1 , missing
*only available for 1998, 2008, 2018 because not all forms of participation were asked in 1988
sum part1

gen convp1=(elect+wrkprty+abstain+protvote)/4
label variable convp1 "Conventional Participation Index"
tab year convp1 , missing
*years 1998, 2002, 2008, 2018
sum convp1


gen unconvp=(opinion+discuss+initiv+unlawdem+demo+petition)/6
label variable unconvp "Unconventional Participation Index"
tab year unconvp , missing
*years 1998, 2008, 2018
sum unconvp


* political efficacy

*measured by the following single items 
*available for years 1988, 1998, 2008, 2018

*politicians don't care about people like me
fre pe01-pe08

gen polcare=pe01
label variable polcare "Politicians don't care about people like me (higher values = disagree)"
label define agree4 1 "fully agree" 2 "rather agree" 3 "rather disagree" 4 "fully disagree"
label values polcare agree4
tab year polcare , missing

replace polcare = (polcare - 1) / 3 // 0-1 scale


*can assume active role in pol group
gen peactgrp=pe02
label variable peactgrp "Can assume active role in pol group"
label values peactgrp agree4
tab year peactgrp , missing


*politics is too complex
gen pecomplx=pe04
label variable pecomplx "Politics is too complex (higher values = disagree)"
label values pecomplx agree4
tab year pecomplx , missing

replace pecomplx = (pecomplx - 1) / 3 // 0-1 scale


*politicians try to represent interests of the people
gen peprpres=pe05
label variable peprpres "Politicians try to represent interests of people"
label values peprpres agree4
tab year peprpres , missing
*not 1988

*I know rather little about politics
gen peknowl=pe06
label variable peknowl "Have little political knowledge (higher values = disagree)"
label values peknowl agree4
tab year peknowl , missing
*not 1988, 1998
replace peknowl = (peknowl - 1) / 3 // 0-1 scale


* indices for political efficacy, internal pol efficacy, external pol efficacy
* based on the three variables asked the most often, i.e., in 1988, 1998, 2008, 2018 (further variables available but not asked so often)
* divided by max sum of all constituting items so index ranges from 0-1

**Final Analyses will be based only on 2008 and 2018 (given availability of most items + bulayouth merge)
*-> I build new version of peff with 2 extpeff & 3 intpeff items (that are available in 08&18 and are clearly part of either intpeff & extpeff (unclear with pemostgr))

gen peactgrp_rev=peactgrp 
recode peactgrp_rev (1=4) (2=3) (3=2) (4=1)
label variable peactgrp_rev "Can assume active role in pol group"

replace peactgrp_rev = (peactgrp_rev - 1) / 3 // 0-1 scale

gen peprpres_rev=peprpres
recode peprpres_rev (1=4) (2=3) (3=2) (4=1)
label variable peprpres_rev "Politicians try to represent interests of people"

replace peprpres_rev = (peprpres_rev - 1) / 3 // 0-1 scale


gen intpeff=(peactgrp_rev+pecomplx + peknowl)/3
label variable intpeff "Internal Political Efficacy Index"
tab year intpeff , missing

gen extpeff=(polcare+peprpres_rev)/2
label variable extpeff "External Political Efficacy Index"
tab year extpeff , missing


gen peff=(peactgrp_rev+pecomplx + peknowl +polcare+peprpres_rev)/5 
label variable peff "Political Efficacy Index"
tab year peff , missing

*final check
pwcorr peactgrp_rev pecomplx peknowl polcare peprpres_rev


** INDEPENDENT VARIABLE: EDUCATION 

*highest edu qualification
fre de05-de15
gen education=. 
label variable education "highest professional qualification"
replace education=1 if de05==1 // kein beruflicher Abschluss
replace education=2 if de06==1 // Anlernzeit
replace education=2 if de07==1 // Teilfacharbeiterabschluss
replace education=2 if de10==1 // Gewerbliche Lehre
replace education=3 if de08==1 // Kaufmännische Lehre
replace education=3 if de09==1 // Praktikum
replace education=4 if de11==1 // Fachschulabschluss
replace education=4 if de12==1 // Berufsfachschulabschluss
replace education=5 if de13==1 // Meister
replace education=6 if de14==1 // Fachhochschulabschluss
replace education=7 if de15==1 // Hochschulabschluss
label define edu1 1 "no qualification" 2 "basic VET" 3 "vocational qualification" 4 "vocational school qualification" 5 "higher vocational qualification (Meister)" 6 "higher education FH" 7 "higher education Uni"
label values education edu1

tab year education , missing
tab education iscd975, missing
tab education iscd11, missing

tab de01 de05,m
tab de01 de06,m
tab de01 de07,m
tab de01 de15,m


* three categories
gen edu3cat=.
label variable edu3cat "Occ. Quali."
replace edu3cat=1 if education==1 | education==2 
replace edu3cat=2 if education==3 | education==4 | education==5
replace edu3cat=3 if education==6 | education==7
label define edu2 1 "Occ. quali.: basic" 2 "Occ. quali.: vocational" 3 "Occ. quali.: academic"
label values edu3cat edu2
table edu3cat education
fre edu3cat

tab education de01,m

**education based on highest degree (respondents and parents)
fre educ feduc meduc

recode educ (1 2 = 1 "Edu. degree: basic") (3 4 = 2 "Edu. degree: vocational") (5 = 3 "Edu. degree: Academic") (6 7 =.), gen(IEdu)

recode educ (6 7 = .), gen(IEdu_detail)

recode feduc (1 2 = 1 "Edu: basic") (3 4 = 2 "Edu: vocational") (5 = 3 "Edu: Academic") (6=.), gen(FEdu)

recode meduc (1 2 = 1 "Edu: basic") (3 4 = 2 "Edu: vocational") (5 = 3 "Edu: Academic") (6=.), gen(MEdu)


gen PEdu = FEdu if FEdu == MEdu
replace PEdu = 3 if FEdu == 3 & MEdu < 3
replace PEdu = 3 if MEdu == 3 & FEdu < 3
replace PEdu = 2 if FEdu == 2 & MEdu < 2
replace PEdu = 2 if MEdu == 2 & FEdu < 2
replace PEdu = FEdu if missing(MEdu)
replace PEdu = MEdu if missing(FEdu)

label variable IEdu "Highest educational degree"
label variable PEdu "Parents: highest educational degree"

label define pedu2 1 "Parental edu. degree: basic" 2 "Parental edu. degree: vocational" 3 "Parental edu. degree: academic"
lab values PEdu pedu2
fre IEdu PEdu

**Parents: highest vocational qualification
*father:
fre fde05-fde15
gen feducation=. 
label variable feducation "father: highest professional qualification"
replace feducation=1 if fde05==1 // kein beruflicher Abschluss
replace feducation=2 if fde06==1 // Anlernzeit
replace feducation=2 if fde07==1 // Teilfacharbeiterabschluss
replace feducation=2 if fde10==1 // Gewerbliche Lehre
replace feducation=3 if fde08==1 // Kaufmännische Lehre
replace feducation=3 if fde09==1 // Praktikum
replace feducation=4 if fde11==1 // Fachschulabschluss
replace feducation=4 if fde12==1 // Berufsfachschulabschluss
replace feducation=5 if fde13==1 // Meister
replace feducation=6 if fde14==1 // Fachhochschulabschluss
replace feducation=7 if fde15==1 // Hochschulabschluss

label values feducation edu1

gen fedu3cat=.
label variable fedu3cat "father: professional qualification"
replace fedu3cat=1 if feducation==1 | feducation==2 
replace fedu3cat=2 if feducation==3 | feducation==4 | feducation==5
replace fedu3cat=3 if feducation==6 | feducation==7
label values fedu3cat edu2
table  feducation fedu3cat
fre fedu3cat

*mother
fre mde05-mde15
gen meducation=. 
label variable meducation "mother: professional qualification"
replace meducation=1 if mde05==1 // kein beruflicher Abschluss
replace meducation=2 if mde06==1 // Anlernzeit
replace meducation=2 if mde07==1 // Teilfacharbeiterabschluss
replace meducation=2 if mde10==1 // Gewerbliche Lehre
replace meducation=3 if mde08==1 // Kaufmännische Lehre
replace meducation=3 if mde09==1 // Praktikum
replace meducation=4 if mde11==1 // Fachschulabschluss
replace meducation=4 if mde12==1 // Berufsfachschulabschluss
replace meducation=5 if mde13==1 // Meister
replace meducation=6 if mde14==1 // Fachhochschulabschluss
replace meducation=7 if mde15==1 // Hochschulabschluss

label values meducation edu1


gen medu3cat=.
label variable medu3cat "mother: professional qualification"
replace medu3cat=1 if meducation==1 | meducation==2 
replace medu3cat=2 if meducation==3 | meducation==4 | meducation==5
replace medu3cat=3 if meducation==6 | meducation==7
label values medu3cat edu2
table  meducation medu3cat
fre medu3cat

*parents highest vocational qualification combined
gen pedu3cat = fedu3cat if fedu3cat == medu3cat
replace pedu3cat = 3 if fedu3cat == 3 & medu3cat < 3
replace pedu3cat = 3 if medu3cat == 3 & fedu3cat < 3
replace pedu3cat = 2 if fedu3cat == 2 & medu3cat < 2
replace pedu3cat = 2 if medu3cat == 2 & fedu3cat < 2
replace pedu3cat = fedu3cat if missing(medu3cat)
replace pedu3cat = medu3cat if missing(fedu3cat)

label variable pedu3cat "Parents: occ. quali."
label define pedu3 1 "Parental occ. quali.: basic" 2 "Parental occ. quali.: vocational" 3 "Parental occ. quali.: academic"
label values pedu3cat pedu3
tab pedu3cat,m
tab pedu3cat PEdu,m

*TEST: (academic - basic mismatch)
cap: drop TestEdu3cat
clonevar TestEdu3cat = edu3cat
lab var TestEdu3cat "corrected: occupational qualification"
fre educ
fre TestEdu3cat
replace TestEdu3cat = 3 if TestEdu3cat != 3 & educ == 5 & edu3cat == 1 & education == 1 // Hochschulreife
replace TestEdu3cat = 2 if TestEdu3cat != 2 & educ == 4 & edu3cat == 1 & education == 1 // Fachhochschulreife
replace TestEdu3cat = 2 if TestEdu3cat != 2 & educ == 3 & edu3cat == 1 & education == 1 // Mittlerer Reife

fre TestEdu3cat
table TestEdu3cat edu3cat,m
**INCOME
fre hhinc inc 
*household income (~13.000) some less missing values than personal income (~15.000), but for theoretical reasons personal income preferred
*di09: subjective feeling income: only 1994 & 2014

cap: drop inc_cat 
cap: drop inc_help
gen inc_cat = .
*fre year
levelsof year, local(yearvals) 

foreach vals of local yearvals{
cap: drop inc_help
xtile inc_help = inc if year == `vals', nq(3)
replace inc_cat = inc_help if year == `vals'
}

fre inc_cat
lab def inclab 1 "Low Income" 2 "Middle Income" 3 "High Income"
lab val inc_cat inclab

*test
xtile inctest1990 = inc if year == 1990, nq(3)
tab inctest1990 inc_cat if year == 1990,m // works
drop inctest1990
drop inc_help

**OCCUPATIONAL STATUS (ISEI88)
fre isei88
bysort year: summarize isei88 // observed from 1999 onwards ... 

*same logic as with income, building 3 categories; year-specific
cap: drop isei_cat 
cap: drop isei_help
gen isei_cat = .

foreach vals of numlist  2000 2002 2004 2006 2008 2010 2012 2014 2016 2018 {
cap: drop isei_help
xtile isei_help = isei88 if year == `vals', nq(3)
replace isei_cat = isei_help if year == `vals'
}

fre isei_cat

*test
xtile iseitest2000 = isei88 if year == 2000, nq(3)
tab iseitest2000 isei_cat if year == 2000,m // works
drop iseitest2000
drop isei_help

***Oesch Classes
fre isco88
fre dw01
fre dw02
recode dw01 (1 3 = 1) (2 = 0) (4/9 = 0) (.c .e .f .n = 0), gen(sempl)
recode dw02 (10/14 20 30/74 = 0) (15 16 21 22 = 1) (17 23 24 = 2) (.c .e .f .n = 0), gen(supvis)
fre supvis
bysort year: summarize isco88 // observed from 1992 on
cap: drop oeschclass
iscogen oeschclass = oesch5(isco88 sempl supvis), from(isco88)
fre oeschclass
recode oeschclass (1 2 = 3 "Service Class") (3 = 2 "Small Business Owners") ( 4 5 = 1 "Working Class"), gen(oesch3class)
tab oeschclass oesch3class

** CONTROL VARIABLES 

* gender 
table sex 
gen female=1 if sex==2
replace female=0 if sex==1
label define fem 0 "male" 1 "female"
label values female fem
label variable female "female"
tab female sex

* Age
label var age "Age"
gen age_sq = age*age
label var age_sq "$(Age)^2$"


*migration background
fre german
recode german (1 2 = 1) (3 = 0), gen(migrabg)
tab migrabg 
lab def mblab 0 "Non-German Nationality" 1 "German Nationality"
lab val migrabg mblab

lab  var migrabg "German Nationality"

* Bundesland during youth (only for waves 1991, 2006-2018)
decode dg10, generate(bulayouth)
table bulayouth

**names of federal states need to be in English and EXACT matches of the names in the edu system data (for matching ... )
replace bulayouth = "BAVARIA" if bula == "BAYERN"
replace bulayouth = "HESSE" if bula == "HESSEN"
replace bulayouth = "LOWER SAXONY" if bula == "NIEDERSACHSEN"
replace bulayouth = "NORTH RHINE-WESTPHALIA" if bula == "NORDRHEIN-WESTFALEN"
replace bulayouth = "RHINELAND-PALATINATE" if bula == "RHEINLAND-PFALZ"
replace bulayouth = "SAXONY" if bula == "SACHSEN"
replace bulayouth = "SAXONY-ANHALT" if bula == "SACHSEN-ANHALT"
replace bulayouth = "THURINGA" if bula == "THUERINGEN"
replace bulayouth = "BERLIN (WEST)" if bula == "EHEM. BERLIN-WEST"
replace bulayouth = "MECKLENBURG-VORPOMMERN" if bula == "MECKLENB.-VORPOMMERN"



table bulayouth

*attention: includes names such as "Datenfehler", "Sonstiges" etc. (not relevant for merged datasets, as these will not be matched and thus deleted)


* Year of entry into secondary education 
gen year_secschool=yborn+10
label variable year_secschool "year of entry into secondary school"

save, replace

*** MERGING *** 
***************

*bulayouth, secondary school
use Allbus_prep_Mar23.dta
merge m:1 bulayouth year_secschool using Typology_Mar23.dta 
keep if _merge==3

fre bulayouth

*for analyses (multilevel models, standard errors, fixed effects ... )
egen BuLaYouth = group(bulayouth)
tab bulayouth BuLaYouth

egen bulacohort = group(bulayouth year_secschool)
summarize bulacohort

save Allbus_bulayouth_sec_Mar23.dta, replace 

*****drop not-merged allbus & typology datasets



*************************************************
*** check outcome var availability per year ***
*************************************************
use Allbus_bulayouth_sec_Mar23, clear 
 
tab year part1  // 2008 & 2018
tab year convp1 // 2008 & 2018
tab year elect // 2008 & 2018
tab year wrkprty // 2008 & 2018
tab year abstain // 2008 & 2018
tab year protvote // 2008 & 2018

tab year unconvp // 2008 & 2018
tab year opinion //  2008 & 2018
tab year discuss // 2008 & 2018
tab year initiv // 2008 & 2018
tab year unlawdem // 2008 & 2018
tab year demo // 2008 & 2018
tab year petition // 2008 & 2018

tab year polint // 1991 & 2006-2018 (2yrs)

tab year peff // 2008 & 2018
tab year intpeff // 2008 & 2018
tab year pecomplx // 2008 & 2018
tab year peactgrp // 2008 & 2018
tab year extpeff //  2008 & 2018


erase Typology_Mar23.dta
erase Allbus_prep_Mar23.dta


tab compr_school	if GDR == 1,m
tab share_gym		if GDR == 1,m
tab duration_prim	if GDR == 1,m
tab destrat_index	if GDR == 1,m
tab mod_edsys		if GDR == 1,m
*all missing!

log close
